Question 1

1.1

USAboundaries::us_states
## function (map_date = NULL, resolution = c("low", "high"), states = NULL) 
## {
##     resolution <- match.arg(resolution)
##     if (is.null(map_date)) {
##         if (resolution == "low") {
##             shp <- USAboundaries::states_contemporary_lores
##         }
##         else if (resolution == "high") {
##             check_data_package()
##             shp <- USAboundariesData::states_contemporary_hires
##         }
##         shp <- filter_by_states(shp, states)
##     }
##     else {
##         map_date <- as.Date(map_date)
##         stopifnot(as.Date("1783-09-03") <= map_date, map_date <= 
##             as.Date("2000-12-31"))
##         check_data_package()
##         if (resolution == "low") {
##             shp <- USAboundariesData::states_historical_lores
##         }
##         else if (resolution == "high") {
##             shp <- USAboundariesData::states_historical_hires
##         }
##         shp <- filter_by_date(shp, map_date)
##         shp <- filter_by_states(shp, states)
##     }
##     shp
## }
## <bytecode: 0x7ff13456f8b0>
## <environment: namespace:USAboundaries>
conus = USAboundaries::us_states() %>%
  filter(!state_name %in% c("Puerto Rico",
                            "Alaska",
                            "Hawaii"))
counties = USAboundaries::us_counties() %>%
  st_as_sf(counties) %>%
  filter(!state_name %in% c("Puerto Rico",
                            "Alaska",
                            "Hawaii")) %>%
  st_transform(counties, crs = 5070)

1.2

county_centroid = st_centroid(counties) %>%
  st_union()

1.3

vor_tess = st_voronoi(county_centroid) %>%
  st_cast() %>%
  st_as_sf() %>%
  mutate(id=1:n())
tri_tess = st_triangulate(county_centroid) %>%
  st_cast() %>%
  st_as_sf() %>%
  mutate(id=1:n())
gridded_counties = st_make_grid(county_centroid, n = 70, square = T) %>%
  st_cast() %>%
  st_as_sf() %>%
  mutate(id=1:n())
hex_counties = st_make_grid(county_centroid, n=70, square = F) %>%
  st_cast() %>%
  st_as_sf() %>%
  mutate(id=1:n())

1.4

vor_tess = st_intersection(vor_tess, st_union(counties))
tri_tess = st_intersection(tri_tess, st_union(counties))
gridded_counties = st_intersection(gridded_counties, st_union(counties))
hex_counties = st_intersection(hex_counties, st_union(counties))

1.5

counties_simp = rmapshaper::ms_simplify(counties, keep = .005)
mapview::npts(counties)
## [1] 51976
mapview::npts(counties_simp)
## [1] 21023

Roughly 50% of the points were removed. This makes the edge of the border less accurate, but makes the computations faster.

1.6

plot_tess = function(sf_obj, title){
  ggplot()+
    geom_sf(data = sf_obj, fill = "white", col = "navy", size = .2)+
    theme_void() +
    labs(title = title, caption = paste("This tessellation has:", nrow(sf_obj), "features." ))
  }

1.7

plot_tess(vor_tess, "Voronoi Tessellation")

plot_tess(tri_tess, "Triangulated Tessellation")

plot_tess(gridded_counties, "Gridded Coverage")

plot_tess(hex_counties, "Hexagonal Coverage")

plot_tess(counties_simp, "Original")

Question 2

2.1

tess_sum = function(sf_obj, text){
  area = st_area(sf_obj) %>%
  units::set_units("km^2") %>%
  units::drop_units()
data.frame(num_features = nrow(sf_obj),
           mean_area = mean(area),
           sd_area = sd(area),
           total_area = sum(area),
           text = text
)
}

2.2

vor_sum_tess = tess_sum(vor_tess, "Voronoi")
tri_sum_tess = tess_sum(tri_tess, "Triangulation")
hex_sum = tess_sum(hex_counties, "Hexagon")
grid_sum = tess_sum(gridded_counties, "Grid")

2.3

summary_tess = bind_rows(
  tess_sum(counties_simp, "Original Counties"),
  tess_sum(tri_tess, "Triangulation"),
  tess_sum(vor_tess, "Voronoi"),
  tess_sum(hex_counties, "Hexagon"),
  tess_sum(gridded_counties, "Grid")
)

2.4

knitr::kable(summary_tess,
             caption = "Five Tessellation summaries",
             col.names = c("Number of Features", "Mean Area", "Standard Deviation of Features", "Total Area", "Text"),
             format.args = list(big.marks = ","))
Five Tessellation summaries
Number of Features Mean Area Standard Deviation of Features Total Area Text
3069 2539.396 3455.0175 7793407 Original Counties
6196 1251.808 1575.7996 7756200 Triangulation
3108 2521.745 2887.1397 7837583 Voronoi
2381 3290.644 835.6690 7835024 Hexagon
3292 2374.481 559.3614 7816790 Grid

2.5

For each of the 5 tessellations, the area and number of polygons changed. This will cause the MAUP to change. The total area was largest with the grid coverage tessellation.The triangulation tessellation had the most number of polygons, and the hexagonal coverage had the about half the number of polygons.

Question 3

3.1

dams <- read_excel("~/github/geog-13-labs/data/NID2019_U.xlsx")
dams_sf = dams %>%
  filter(!is.na(LONGITUDE), !is.na(LATITUDE)) %>%
  st_as_sf(coords = c("LONGITUDE","LATITUDE"), crs = 4326) %>%
  st_transform(5070)

3.2

pip_func = function(points, polygon, id){
  st_join(polygon, points) %>%
    st_drop_geometry() %>%
    count(get('id')) %>%
    setNames(c(id, "n")) %>%
    left_join(polygon, by = id) %>%
    st_as_sf()
}
pip_func_county_simp = function(points, polygon, id){
  st_join(polygon, points) %>%
    st_drop_geometry() %>%
    count(get('geoid')) %>%
    setNames(c(id, "n")) %>%
    left_join(polygon, by = id) %>%
    st_as_sf()
}

3.3

vor_pip = pip_func(dams_sf, vor_tess, "id")
tri_pip = pip_func(dams_sf, tri_tess, "id")
gridded_pip = pip_func(dams_sf, gridded_counties, "id")
hex_pip = pip_func(dams_sf, hex_counties, "id")
counties_simp_pip = pip_func_county_simp(dams_sf, counties_simp, "geoid")

3.4

plot_pip = function(sf_obj, title){
  ggplot()+
    geom_sf(data = sf_obj, aes(fill = log(n)))+
    scale_fill_viridis_c()+
    theme_void() +
    labs(title = title, caption = paste("This tessellation has:", sum(sf_obj$n), "dams." ))
}

3.5

plot_pip(vor_pip, "Voronoi")

plot_pip(tri_pip, "triangulation")

plot_pip(gridded_pip, "gridded")

plot_pip(hex_pip, "hexagonal")

plot_pip(counties_simp_pip, "original")

3.6

The more polygons each tessellation had, the greater number of dams there were in each section. Moving forward, I will be using the hexagonal tessellation.

Question 4

4.1

dam_freq = strsplit(dams_sf$PURPOSES, split = "") %>%
     unlist() %>%
     table() %>%
     as.data.frame() %>%
     setNames(c("abbr", "count"))
r_grep = grepl("R", dams_sf$PURPOSES[1:20])
s_grep = grepl("S", dams_sf$PURPOSES[1:20])
h_grep = grepl("H", dams_sf$PURPOSES[1:20])
d_grep = grepl("D", dams_sf$PURPOSES[1:20])

4.2

plot_pip = function(sf_obj, title){
  ggplot()+
    geom_sf(data = sf_obj, aes(fill = log(n)))+
    scale_fill_viridis_c()+
    theme_void() +
    labs(title = title, caption = paste("This tessellation has:", sum(sf_obj$n), "dams." ))+
    gghighlight::gghighlight(n>=(mean(n)+sd(n)))
}
rec = dams_sf %>% 
  filter(grepl("R", PURPOSES)) %>% 
  pip_func(vor_tess, "id") %>% 
  plot_pip(vor_tess)+
  gghighlight::gghighlight(n >= (mean(n)+sd(n)))
s = dams_sf %>% 
  filter(grepl("S", PURPOSES)) %>% 
  pip_func(vor_tess, "id") %>% 
  plot_pip(vor_tess)+
  gghighlight::gghighlight(n >= (mean(n)+sd(n)))
h = dams_sf %>% 
  filter(grepl("H", PURPOSES)) %>% 
  pip_func(vor_tess, "id") %>% 
  plot_pip(vor_tess)+
  gghighlight::gghighlight(n >= (mean(n)+sd(n)))
d = dams_sf %>% 
  filter(grepl("D", PURPOSES)) %>% 
  pip_func(vor_tess, "id") %>% 
  plot_pip(vor_tess)+
  gghighlight::gghighlight(n >= (mean(n)+sd(n)))
rec

s

h

d

4.3

The dams found align with major lakes and rivers, such as the Mississippi. Therefore, the geographic distribution does “make sense”. I feel that the original tessellation would provide better results, but the hexagonal provides results closest to the original.